In [ ]:
In [ ]:
import os
from collections import defaultdict
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
import yaml
import re
def safe_yaml_read(fpath, replace_str=''):
"""
Reads a yaml file stripping all of the jinja templating markup
Parameters
----------
fpath : str
Path to yaml file to sanitize
replace_str : str
String to replace the template markup with, defaults to ''.
Returns
-------
yaml_dict : dict
The dictionary with all of the jinja2 templating fields
replaced with ``replace_str``.
"""
with open(fpath, 'r') as f:
lns = []
for ln in f:
lns.append(re.sub(r'{[{%].*?[%}]}', '', ln))
meta_dict = yaml.load(''.join(lns))
return meta_dict
In [ ]:
recipes_path = os.path.join(os.path.expanduser('~'), 'dev', 'conda',
'conda-prescriptions', 'recipes')
In [ ]:
from yaml.parser import ParserError
all_recipes = defaultdict(dict)
for parent_folder, child_folders, files in os.walk(recipes_path):
if 'meta.yaml' in files:
split_path = parent_folder.split(os.sep)
lib_name = split_path[-2]
version = split_path[-1]
try:
yaml_info = safe_yaml_read(os.path.join(parent_folder, 'meta.yaml'))
except ParserError as pe:
print('%s::%s not parseable' % (lib_name, version))
print('ParserError --> %s' % pe)
yaml_info = None
all_recipes[lib_name][version] = yaml_info
# remove python. we need to special case that one...
python_versions = all_recipes.pop('python')
In [ ]:
for recipe, version in sorted(all_recipes.items()):
print(recipe, sorted(version.keys()))
Looks good! We should separate these into latest_tagged
and dev_only
In [ ]:
latest_tagged = defaultdict(dict)
for lib_name, all_versions in all_recipes.items():
versions = sorted(all_versions.keys())
if len(versions) == 1:
version = versions[0]
else:
if 'dev' in versions:
versions.remove('dev')
version = versions[-1]
latest_tagged[lib_name][version] = all_versions[version]
In [ ]:
for recipe, version in sorted(latest_tagged.items()):
print(recipe, sorted(version.keys()))
In [ ]:
dev_only = defaultdict(dict)
for lib_name, all_versions in all_recipes.items():
if 'dev' in all_versions.keys():
dev_only[lib_name] = all_versions['dev']
In [ ]:
print(sorted(dev_only.keys()))
In [ ]:
import networkx as nx
In [ ]:
def add_requirements(graph, requirements_list, target_lib):
graph.add_node(target_lib)
for req in requirements_list:
graph.add_node(req)
graph.add_edge(req, target_lib)
In [ ]:
fig, ax = plt.subplots(ncols=2, nrows=len(dev_only), figsize=(10,4*len(dev_only)))
all_runs_dev_only = nx.DiGraph()
all_builds_dev_only = nx.DiGraph()
for row, (lib, meta) in enumerate(sorted(dev_only.items())):
run = nx.DiGraph()
build = nx.DiGraph()
build_reqs = meta['requirements']['build']
run_reqs = meta['requirements']['run']
add_requirements(build, build_reqs, lib)
add_requirements(run, run_reqs, lib)
add_requirements(all_builds_dev_only, build_reqs, lib)
add_requirements(all_runs_dev_only, run_reqs, lib)
build_ax = ax[row][0]
row_ax = ax[row][1]
nx.draw_networkx(build, ax=build_ax)
nx.draw_networkx(run, ax=row_ax)
build_ax.set_title("%s Build requirements" % lib)
row_ax.set_title("%s Run requirements" % lib)
In [ ]:
nx.is_directed_acyclic_graph(all_runs_dev_only)
In [ ]:
nx.is_directed_acyclic_graph(all_builds_dev_only)
In [ ]:
fig, ax = plt.subplots(ncols=2, nrows=len(latest_tagged), figsize=(10,4*len(latest_tagged)))
all_runs_latest_tagged = nx.DiGraph()
all_builds_latest_tagged = nx.DiGraph()
for row, (lib, version) in enumerate(sorted(latest_tagged.items())):
meta = list(version.values())[0]
run = nx.DiGraph()
build = nx.DiGraph()
build_ax = ax[row][0]
row_ax = ax[row][1]
reqs = meta.get('requirements')
build_ax.set_title("%s Build requirements" % lib)
row_ax.set_title("%s Run requirements" % lib)
if reqs:
build_reqs = reqs.get('build')
run_reqs = reqs.get('run')
if build_reqs:
add_requirements(build, build_reqs, lib)
add_requirements(all_builds_latest_tagged, build_reqs, lib)
nx.draw_networkx(build, ax=build_ax)
if run_reqs:
add_requirements(run, run_reqs, lib)
add_requirements(all_runs_latest_tagged, run_reqs, lib)
nx.draw_networkx(run, ax=row_ax)
In [ ]:
nx.is_directed_acyclic_graph(all_builds_latest_tagged)
In [ ]:
nx.is_directed_acyclic_graph(all_runs_latest_tagged)
In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_builds_dev_only, ax=ax)
ax.set_title("All build requirements, dev recipes only")
In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_runs_dev_only, ax=ax)
ax.set_title("All runtime requirements, dev recipes only")
In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_builds_latest_tagged, ax=ax)
ax.set_title("All build requirements, latest tagged recipes")
In [ ]:
fig, ax = plt.subplots(figsize=(50,50))
nx.draw_networkx(all_runs_latest_tagged, ax=ax)
ax.set_title("All runtime requirements, latest tagged recipes")
In [ ]:
sorted(all_runs_latest_tagged.nodes())
In [ ]:
all_runs_latest_tagged.subgraph??
In [ ]:
g = all_runs_latest_tagged.subgraph('dataportal')
In [ ]:
nx.draw_networkx(g)
In [ ]:
all_runs_latest_tagged.edges()
In [ ]:
In [ ]:
all_runs_latest_tagged['clint']
In [ ]: